github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/runners/interactive/examples/Interactive Beam Example.ipynb (about) 1 { 2 "cells": [ 3 { 4 "cell_type": "markdown", 5 "metadata": {}, 6 "source": [ 7 "<!--\n", 8 " Licensed to the Apache Software Foundation (ASF) under one\n", 9 " or more contributor license agreements. See the NOTICE file\n", 10 " distributed with this work for additional information\n", 11 " regarding copyright ownership. The ASF licenses this file\n", 12 " to you under the Apache License, Version 2.0 (the\n", 13 " \"License\"); you may not use this file except in compliance\n", 14 " with the License. You may obtain a copy of the License at\n", 15 "\n", 16 " http://www.apache.org/licenses/LICENSE-2.0\n", 17 "\n", 18 " Unless required by applicable law or agreed to in writing,\n", 19 " software distributed under the License is distributed on an\n", 20 " \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n", 21 " KIND, either express or implied. See the License for the\n", 22 " specific language governing permissions and limitations\n", 23 " under the License.\n", 24 "-->\n", 25 "\n", 26 "# Interactive Beam Examples" 27 ] 28 }, 29 { 30 "cell_type": "code", 31 "execution_count": null, 32 "metadata": {}, 33 "outputs": [], 34 "source": [ 35 "import apache_beam as beam\n", 36 "from apache_beam.runners.interactive import interactive_runner\n", 37 "from apache_beam.runners.interactive.interactive_beam import *" 38 ] 39 }, 40 { 41 "cell_type": "code", 42 "execution_count": null, 43 "metadata": {}, 44 "outputs": [], 45 "source": [ 46 "p = beam.Pipeline(interactive_runner.InteractiveRunner())" 47 ] 48 }, 49 { 50 "cell_type": "code", 51 "execution_count": null, 52 "metadata": {}, 53 "outputs": [], 54 "source": [ 55 "init_pcoll = p | beam.Create(range(10))\n", 56 "show(init_pcoll)" 57 ] 58 }, 59 { 60 "cell_type": "code", 61 "execution_count": null, 62 "metadata": {}, 63 "outputs": [], 64 "source": [ 65 "squares = init_pcoll | 'Square' >> beam.Map(lambda x: x*x)\n", 66 "show(squares)" 67 ] 68 }, 69 { 70 "cell_type": "code", 71 "execution_count": null, 72 "metadata": {}, 73 "outputs": [], 74 "source": [ 75 "cubes = init_pcoll | 'Cube' >> beam.Map(lambda x: x**3)\n", 76 "show(cubes)" 77 ] 78 }, 79 { 80 "cell_type": "code", 81 "execution_count": null, 82 "metadata": {}, 83 "outputs": [], 84 "source": [ 85 "result = p.run()\n", 86 "init_list = list(range(10))\n", 87 "squares_list = list(result.get(squares))\n", 88 "cubes_list = list(result.get(cubes))\n", 89 "\n", 90 "squares_list.sort()\n", 91 "cubes_list.sort()\n", 92 "\n", 93 "!pip install matplotlib\n", 94 "\n", 95 "%matplotlib inline\n", 96 "from matplotlib import pyplot as plt\n", 97 "plt.scatter(init_list, squares_list, label='squares', color='red')\n", 98 "plt.scatter(init_list, cubes_list, label='cubes', color='blue')\n", 99 "plt.legend(loc='upper left')\n", 100 "plt.show()" 101 ] 102 }, 103 { 104 "cell_type": "code", 105 "execution_count": null, 106 "metadata": {}, 107 "outputs": [], 108 "source": [ 109 "class AverageFn(beam.CombineFn):\n", 110 " def create_accumulator(self):\n", 111 " return (0.0, 0)\n", 112 "\n", 113 " def add_input(self, sum_count, input):\n", 114 " (sum, count) = sum_count\n", 115 " return sum + input, count + 1\n", 116 "\n", 117 " def merge_accumulators(self, accumulators):\n", 118 " sums, counts = zip(*accumulators)\n", 119 " return sum(sums), sum(counts)\n", 120 "\n", 121 " def extract_output(self, sum_count):\n", 122 " (sum, count) = sum_count\n", 123 " return sum / count if count else float('NaN')" 124 ] 125 }, 126 { 127 "cell_type": "code", 128 "execution_count": null, 129 "metadata": { 130 "scrolled": true 131 }, 132 "outputs": [], 133 "source": [ 134 "average_square = squares | 'Average Square' >> beam.CombineGlobally(AverageFn())\n", 135 "average_cube = cubes | 'Average Cube' >> beam.CombineGlobally(AverageFn())" 136 ] 137 }, 138 { 139 "cell_type": "code", 140 "execution_count": null, 141 "metadata": {}, 142 "outputs": [], 143 "source": [ 144 "show(average_square)" 145 ] 146 }, 147 { 148 "cell_type": "code", 149 "execution_count": null, 150 "metadata": {}, 151 "outputs": [], 152 "source": [ 153 "show(average_cube)" 154 ] 155 }, 156 { 157 "cell_type": "code", 158 "execution_count": null, 159 "metadata": {}, 160 "outputs": [], 161 "source": [ 162 "p.run()" 163 ] 164 } 165 ], 166 "metadata": { 167 "language_info": { 168 "codemirror_mode": { 169 "name": "ipython", 170 "version": 3 171 }, 172 "file_extension": ".py", 173 "mimetype": "text/x-python", 174 "name": "python", 175 "nbconvert_exporter": "python", 176 "pygments_lexer": "ipython3", 177 "version": "3.7.4" 178 } 179 }, 180 "nbformat": 4, 181 "nbformat_minor": 4 182 }